#################################################################################################
#Table A.13
#This table presents heterogeneous treatment effects by implementation quality
#################################################################################################
#PaP model = rating ~ fps distance + unsuccessful trips + collection time + overcharges + relationship variables (length, caste, religion)
#relevant variables
#fps rating = inter_el1_analysis_data$c34_opinion_fps
#fps given distance = c2_far_fps
#unsuccessful trips = c15_mar_unsucc_fps c16_april_unsucc_fps c17_may_unsucc_fps
#collection time = c5_go_hrs c5_go_min
#overcharged for ration = c42_mar_addi_pay c44_april_addi_pay c46_may_addi_pay
#length of relationship with dealer = d1_know_deal_yy + d1_know_deal_mm
#same caste = d9_deal_caste_same_y0
#same religion = d7_deal_religion_same_y0

#read data
inter_bl_analysis_data <- readstata13::read.dta13(paste0(SurveyDataDir, "Single_BL_HH_Dataset_For_Validation.dta"))
inter_el1_analysis_data <- readstata13::read.dta13(paste0(SurveyDataDir, "Single_EL_HH_Dataset_For_Validation.dta"))
final_analysis_data <- readstata13::read.dta13(paste0(SurveyDataDir, "JH_ePOS_HH_DataforAnalysis.dta"))

#clean variables
inter_bl_analysis_data$pds_rating <- NA
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Extremely Dissatisfied"] <- 1
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Dissatisfied"] <- 2
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Indifferent"] <- 3
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Satisfied"] <- 4
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Extremely Satisfied"] <- 5
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Did not answer"] <- NA
inter_bl_analysis_data$pds_rating[inter_bl_analysis_data$c53_satis_pds == "Did not know"] <- NA
inter_bl_analysis_data$c2_far_fps[inter_bl_analysis_data$c2_far_fps < 0] <- NA
inter_bl_analysis_data$c15_mar_unsucc_fps[inter_bl_analysis_data$c15_mar_unsucc_fps < 0] <- NA
inter_bl_analysis_data$c16_april_unsucc_fps[inter_bl_analysis_data$c16_april_unsucc_fps < 0] <- NA
inter_bl_analysis_data$c17_may_unsucc_fps[inter_bl_analysis_data$c17_may_unsucc_fps < 0] <- NA
inter_bl_analysis_data$c5_go_hrs[inter_bl_analysis_data$c5_go_hrs < 0] <- NA
inter_bl_analysis_data$c5_go_min[inter_bl_analysis_data$c5_go_min < 0] <- NA
inter_bl_analysis_data$fps_trip_time <- inter_bl_analysis_data$c5_go_hrs + (inter_bl_analysis_data$c5_go_min/60)
inter_bl_analysis_data$mar_overcharge <- NA
inter_bl_analysis_data$mar_overcharge[inter_bl_analysis_data$c42_mar_addi_pay == "Did not answer"] <- NA
inter_bl_analysis_data$mar_overcharge[inter_bl_analysis_data$c42_mar_addi_pay == "Did not know"] <- NA
inter_bl_analysis_data$mar_overcharge[inter_bl_analysis_data$c42_mar_addi_pay == "Yes"] <- 1
inter_bl_analysis_data$mar_overcharge[inter_bl_analysis_data$c42_mar_addi_pay == "No"] <- 0
inter_bl_analysis_data$april_overcharge <- NA
inter_bl_analysis_data$april_overcharge[inter_bl_analysis_data$c44_april_addi_pay == "Did not answer"] <- NA
inter_bl_analysis_data$april_overcharge[inter_bl_analysis_data$c44_april_addi_pay == "Did not know"] <- NA
inter_bl_analysis_data$april_overcharge[inter_bl_analysis_data$c44_april_addi_pay == "Yes"] <- 1
inter_bl_analysis_data$april_overcharge[inter_bl_analysis_data$c44_april_addi_pay == "No"] <- 0
inter_bl_analysis_data$may_overcharge <- NA
inter_bl_analysis_data$may_overcharge[inter_bl_analysis_data$c46_may_addi_pay == "Did not answer"] <- NA
inter_bl_analysis_data$may_overcharge[inter_bl_analysis_data$c46_may_addi_pay == "Did not know"] <- NA
inter_bl_analysis_data$may_overcharge[inter_bl_analysis_data$c46_may_addi_pay == "Yes"] <- 1
inter_bl_analysis_data$may_overcharge[inter_bl_analysis_data$c46_may_addi_pay == "No"] <- 0
inter_bl_analysis_data$d1_know_deal_yy[inter_bl_analysis_data$d1_know_deal_yy < 0] <- NA
inter_bl_analysis_data$d1_know_deal_mm[inter_bl_analysis_data$d1_know_deal_mm < 0] <- NA
#6 cases where it's coded as 88 and 89 instead of -88 and -89
inter_bl_analysis_data$d1_know_deal_yy[inter_bl_analysis_data$d1_know_deal_yy > 60] <- NA
inter_bl_analysis_data$d1_know_deal_mm[inter_bl_analysis_data$d1_know_deal_mm > 60] <- NA
inter_bl_analysis_data$know_deal_length <- inter_bl_analysis_data$d1_know_deal_yy + (inter_bl_analysis_data$d1_know_deal_mm/12)
inter_bl_analysis_data$deal_caste_same <- NA
inter_bl_analysis_data$deal_caste_same[inter_bl_analysis_data$d7_deal_religion_same == "Did not answer"] <- NA
inter_bl_analysis_data$deal_caste_same[inter_bl_analysis_data$d7_deal_religion_same == "Did not know"] <- NA
inter_bl_analysis_data$deal_caste_same[inter_bl_analysis_data$d7_deal_religion_same == "Yes"] <- 1
inter_bl_analysis_data$deal_caste_same[inter_bl_analysis_data$d7_deal_religion_same == "No"] <- 0
inter_bl_analysis_data$deal_religion_same <- NA
inter_bl_analysis_data$deal_religion_same[inter_bl_analysis_data$d9_deal_caste_same == "Did not answer"] <- NA
inter_bl_analysis_data$deal_religion_same[inter_bl_analysis_data$d9_deal_caste_same == "Did not know"] <- NA
inter_bl_analysis_data$deal_religion_same[inter_bl_analysis_data$d9_deal_caste_same == "Yes"] <- 1
inter_bl_analysis_data$deal_religion_same[inter_bl_analysis_data$d9_deal_caste_same == "No"] <- 0

imp_quality_data <- inter_bl_analysis_data

#reshape dataframe
imp_quality_data <- reshape(imp_quality_data, 
                            varying = c("mar_overcharge", "c15_mar_unsucc_fps", "april_overcharge", "c16_april_unsucc_fps","may_overcharge","c17_may_unsucc_fps"),
                            v.names = c("overcharge", "unsucc_trips"),
                            timevar = c("time"),
                            times = c("mar", "april", "may"),
                            direction = "long",
                            idvar = "uid")


#run reg on subjective variable
subj_qual <- miceadds::lm.cluster(data = imp_quality_data, pds_rating ~ c2_far_fps + unsucc_trips + fps_trip_time +

                          overcharge + know_deal_length + deal_caste_same + deal_religion_same, cluster = "block_code")

#predict subjective variable
imp_quality_data$pred_imp_qual <- predict(subj_qual$lm_res, imp_quality_data)

#aggregate subjective variable by fps
fps_quality <- aggregate(imp_quality_data[,c("pred_imp_qual")],by = list(FPS = imp_quality_data$fps_code), FUN = base::mean, na.rm = T)
names(fps_quality) <- c("fps_code", "pred_imp_qual")

#create an indicator where 1 = FPS above median, 0 = FPS below median
fps_quality$abv_med_imp <- 0
fps_quality$abv_med_imp[fps_quality$pred_imp_qual >= median(fps_quality$pred_imp_qual)] <- 1

#create indicator for individual beneficiaries based on FPS indicator
imp_quality_data$above_median_imp_qual <- NA

return_ind <- function(x){
  ori_spot <- which(fps_quality$fps_code %in% x)[1]
  return(fps_quality$abv_med_imp[ori_spot])
}

imp_quality_data$above_median_imp_qual <- sapply(imp_quality_data$fps_code, return_ind)

#merge to analysis data
vars_to_keep <- c("above_median_imp_qual", "uid")
imp_quality_data <- imp_quality_data[vars_to_keep]
imp_quality_data <- imp_quality_data[!duplicated(imp_quality_data$uid),]

het_analysis_data <- merge(final_analysis_data, imp_quality_data, by = "uid")

#want to run analysis on the following outcomes
#value received = value_total_mar17 value_total_feb17 value_total_jan17, baseline value = value_total_y0
#value received (WTA) = c6_WTA_mar17, c6_WTA_feb17, c6_WTA_jan17 no baseline
#transaction cost = c_total_access_cost_adj_mar17, baseline = c_cost_to_access_y0 #drop ghosts (ghost_final) here

#create replace missing baseline with 
het_analysis_data$bl_value_total <- het_analysis_data$value_total_y0
het_analysis_data$bl_value_total_mi <- 0
het_analysis_data$bl_value_total_mi[is.na(het_analysis_data$value_total_y0)] <- 1
het_analysis_data$bl_value_total[het_analysis_data$bl_value_total_mi == 1] <- mean(het_analysis_data$value_total_y0, na.rm = T)

het_analysis_data$bl_access_cost <- het_analysis_data$c_cost_to_access_y0
het_analysis_data$bl_access_cost_mi <- 0
het_analysis_data$bl_access_cost_mi[is.na(het_analysis_data$c_cost_to_access_y0)] <- 1
het_analysis_data$bl_access_cost[het_analysis_data$bl_access_cost_mi == 1] <- mean(het_analysis_data$c_cost_to_access_y0, na.rm = T)


het_analysis_data_long <- reshape(het_analysis_data, 
                                  varying = c("value_total_mar17",  "c6_WTA_mar17","value_total_feb17", "c6_WTA_feb17","value_total_jan17","c6_WTA_jan17"),
                                  v.names = c("value_total", "wta"),
                                  timevar = c("time"),
                                  times = c("mar", "feb", "jan"),
                                  direction = "long")

#run reg

#generate interaction variable
het_analysis_data_long$TXimp <- het_analysis_data_long$treatment * het_analysis_data_long$above_median_imp_qual
het_analysis_data$TXimp <- het_analysis_data$treatment * het_analysis_data$above_median_imp_qual

#set as survey design
impDesignLong <- survey::svydesign(id      = ~block_code,
                           strata  = ~strata,
                           weights = ~pweight,
                           data    = het_analysis_data_long)

het_analysis_data <- subset(het_analysis_data, het_analysis_data$ghost_final != 1)
impDesign <- survey::svydesign(id      = ~block_code,
                       strata  = ~strata,
                       weights = ~pweight,
                       data    = het_analysis_data)
#value received
value_reg <- survey::svyglm(formula = value_total ~ treatment + TXimp + bl_value_total + bl_value_total_mi + factor(strata),family="gaussian", impDesignLong)
value_reg_0 <- lincom(value_reg, "treatment")
value_reg_1 <- lincom(value_reg, c("treatment + TXimp"))
value_reg_d <- lincom(value_reg, c("TXimp"))

#value received (WTA)
wta_reg <- survey::svyglm(formula = wta ~ treatment + TXimp + factor(strata),family="gaussian", impDesignLong)
wta_reg_0 <- lincom(wta_reg, "treatment")
wta_reg_1 <- lincom(wta_reg, c("treatment + TXimp"))
wta_reg_d <- lincom(wta_reg, c("TXimp"))

#access cost
access_reg <- survey::svyglm(formula = c_total_access_cost_adj_mar17 ~ treatment + TXimp + bl_access_cost + bl_access_cost_mi + factor(strata),family="gaussian", impDesign)
access_reg_0 <- lincom(access_reg, "treatment")
access_reg_1 <- lincom(access_reg, c("treatment + TXimp"))
access_reg_d <- lincom(access_reg, c("TXimp"))

#treatment results
no_imp <- rbind(value_reg_0, wta_reg_0)
no_imp <- rbind(no_imp, access_reg_0)

yes_imp <- rbind(value_reg_1, wta_reg_1)
yes_imp <- rbind(yes_imp, access_reg_1)

diff_imp <- rbind(value_reg_d, wta_reg_d)
diff_imp <- rbind(diff_imp, access_reg_d)

#create table
no_imp$sd <- (no_imp$`97.5 %` - no_imp$`2.5 %`)/3.92
yes_imp$sd <- (yes_imp$`97.5 %` - yes_imp$`2.5 %`)/3.92
diff_imp$sd <- (diff_imp$`97.5 %` - diff_imp$`2.5 %`)/3.92

temp1 <- survey::svyglm(formula = value_total ~ treatment + TXimp + bl_access_cost ,family="gaussian", impDesignLong)
temp2 <- survey::svyglm(formula = wta ~ treatment + TXimp + bl_access_cost ,family="gaussian", impDesignLong)
temp3 <- survey::svyglm(formula = c_total_access_cost_adj_mar17 ~ treatment + TXimp + bl_access_cost,family="gaussian", impDesign)

coefs <- list(c(0,no_imp$Estimate), c(0,yes_imp$Estimate), c(0,diff_imp$Estimate))
ses <- list(c(0,no_imp$sd), c(0,yes_imp$sd), c(0,diff_imp$sd))
results <- stargazer::stargazer(temp1, temp2,temp3, dep.var.caption  = "FPS is Above Median of Implementation Quality", header=FALSE, align=FALSE, float=F, dep.var.labels = c("No", "Yes", "Difference"),
                     model.numbers = FALSE, covariate.labels = c("Value Received (market prices)", "Value Received (WTA)", "Transaction Costs"), omit = c("Constant", "Observations"), coef = coefs, se = ses,
                     omit.table.layout = "sn", digits = 0, column.sep.width = "50pt", table.layout = "-ld-t-")
starpolishr::star_tex_write(results, file = paste0(OutputDir, "TableA_13.tex"))